library(tidyverse)
library(magrittr)
library(lubridate)
library(scales)
library(matrixStats)
library(ggrepel)
library(broom)
library(glue)
library(jsonlite)
library(rvest)
library(RCurl)
library(pander)
library(DT)
library(plotly)
library(cowplot)
library(QuantTools)
library(ggfortify)
library(readxl)
panderOptions("big.mark", ",")
panderOptions("table.split.table", Inf)
panderOptions("table.style", "rmarkdown")
panderOptions("missing", "")
theme_set(theme_bw())
shiftAxisLabel <- function(x, k = 2){
  x$x$layout$annotations[[2]]$x <- x$x$layout$annotations[[2]]$x*k
  x$x$layout$margin$l <- x$x$layout$margin$l*k
  x
}
# Handle updates between 12am & 12pm
dt <- Sys.Date()
if (as.numeric(format(Sys.time(), "%H")) <= 14){
  dt <- Sys.Date() - 1
}

Disclaimer: This very simple report was prepared by a bioinformatician with no experience in epidemiology or virology, and as such should be treated simply as an alternate viewpoint on the data, which I was simply unable to find elsewhere. Many other people exist with much greater expertise on this subject. However, I do hope this provides a useful perspective which is able to add constructively to the wider discussion. In addition, it should be noted that this is very much focussed on Australian data.

Data Sources

confirmed <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
    read_csv() %>%
    pivot_longer(
        cols = ends_with("20"),
        names_to = "date",
        values_to = "confirmed"
    )  %>%
    mutate(
        date = str_replace_all(
            date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
        ) %>%
            ymd()
    ) %>%
    dplyr::rename(
        Country = `Country/Region`
    ) %>%
    dplyr::mutate(
        Country = case_when(
            `Province/State` == "Hubei" ~ "China (Hubei)",
            `Province/State` == "Hong Kong" ~ "Hong Kong",
            grepl("China", Country) & !`Province/State` %in% c("Hubei", "Hong Kong") ~ "China (Other)",
            Country == "Korea, South" ~ "South Korea",
            Country == "Congo (Kinshasa)" ~ "DR Congo",
            Country == "Congo (Brazzaville)" ~ "Congo",
            !grepl("China", Country) ~ Country
        )
    ) %>%
    dplyr::filter(
        !is.na(confirmed)
    ) %>%
  dplyr::select(-Lat, -Long)
recovered <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
  read_csv() %>%
  pivot_longer(
    cols = ends_with("20"),
    names_to = "date",
    values_to = "recovered"
  )  %>%
  mutate(
    date = str_replace_all(
      date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
    ) %>%
      ymd()
  ) %>%
  dplyr::rename(
    Country = `Country/Region`
  ) %>%
  dplyr::mutate(
    Country = case_when(
      `Province/State` == "Hubei" ~ "China (Hubei)",
      `Province/State` == "Hong Kong" ~ "Hong Kong",
      grepl("China", Country) & !`Province/State` %in% c("Hubei", "Hong Kong") ~ "China (Other)",
      grepl("Korea, South", Country) ~ "South Korea",
      Country == "Congo (Kinshasa)" ~ "DR Congo",
      Country == "Congo (Brazzaville)" ~ "Congo",
      !grepl("China", Country) ~ Country
    )
  ) %>%
  dplyr::select(-Lat, -Long) 
deaths <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
  read_csv() %>%
  pivot_longer(
    cols = ends_with("20"),
    names_to = "date",
    values_to = "deaths"
  )  %>%
  mutate(
    date = str_replace_all(
      date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
    ) %>%
      ymd()
  ) %>%
  dplyr::rename(
    Country = `Country/Region`
  ) %>%
  dplyr::mutate(
    Country = case_when(
      `Province/State` == "Hubei" ~ "China (Hubei)",
      `Province/State` == "Hong Kong" ~ "Hong Kong",
      grepl("China", Country) & !`Province/State` %in% c("Hubei", "Hong Kong") ~ "China (Other)",
      Country == "Korea, South" ~ "South Korea",
      Country == "Congo (Kinshasa)" ~ "DR Congo",
      Country == "Congo (Brazzaville)" ~ "Congo",      
      !grepl("China", Country) ~ Country
    )
  ) %>%
  dplyr::select(-Lat, -Long) 
predRecovered <- confirmed %>%
  rename(
    recovered = confirmed
  ) %>%
  mutate(
    date = date + 21
  ) %>%
  left_join(deaths) %>%
  mutate(recovered = recovered - deaths) %>%
  dplyr::select(
    `Province/State`, Country, date,recovered
  )
wikiPops <- read_tsv("wikiPops.tsv") %>%
  mutate_at(c("Region", "Continent"), as.factor)
alwaysShow <- c("Australia", "New Zealand", "US", "Canada", "United Kingdom", "Taiwan*", "Singapore", "South Korea", "China (Other)", "China (Hubei)", "Hong Kong", "Japan", "Ireland", "Russia", "Brazil", "Kuwait", "Finland", "Norway", "India")
countryCols <- confirmed %>% 
  dplyr::filter(confirmed > 0) %>% 
  arrange(date) %>% 
  distinct(Country) %>% 
  left_join(wikiPops) %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    )
  ) %>%
  droplevels() %>%
  split(f = .$Region) %>%
  lapply(
    function(x){
      mutate(x, rgb = hue_pal()(nrow(x)))
      }
    ) %>%
  bind_rows() %>%
  with(structure(rgb, names = Country)) 

Data for confirmed cases, recoveries and fatalities was primarily sourced from Johns Hopkins University, using the datasets provided at https://github.com/CSSEGISandData/COVID-19. JHU data is now updated every 24 hours at approximately 2:30PM in Adelaide. As such no accurate, daily updates for international data can be produced until after that time. Importantly, dates associated with confirmed cases from this data source, may differ from dates associated with confirmed cases from Australian sources. For example, cases reported in the morning by Australian states may be assigned to the previous day in US data sources.

Population sizes were obtained from 2019 UN Estimates. Given the disparity of infection within China, China was broken into Hubei Province and the rest of China, with Hong Kong and Taiwan already being considered as separate countries in all datasets. Population estimates for Hubei Province were taken from the 2018 estimates given by Statista.com and this is likely to be a very slight underestimate.

However, all countries are likely to contain many unreported cases given the incomplete testing regimes in place for most countries. Similarly, reporting in many countries may have features that cause concerns regarding data integrity and this makes comparison across countries difficult. Information on recovered cases has been difficult to accurately obtain due inconsistent methods for considering a case as recovered, and lack of reporting for these cases in many jurisdictions.

Australian Plots Have Moved

All Australian specific data has been moved to a separate page for simpler management.

startingPoint <- 4
minPop <- 2e6

Regional Data

Cumulative Infection Rates

Before exploring individual countries, a regional perspective may be helpful. Importantly, only Oceania, Eastern Asia, Western Europe and Southern Europe have successfully controlled the infection rate. Most other regions are still experiencing exponential growth.

ggplotly(
  confirmed %>% 
    inner_join(wikiPops) %>%
    mutate(
      Continent = str_replace_all(Continent, "(Oceania|Asia)", "Asia & Oceania")
    ) %>%
    group_by(Continent, Region, Country, Population, date) %>% 
    summarise(confirmed = sum(confirmed)) %>% 
    ungroup() %>%
    group_by(Continent, Region, date) %>% 
    summarise(rate = round(1e6*sum(confirmed)/sum(Population), 2)) %>% 
    dplyr::filter(
      date < dt, 
      date > "2020-02-18", 
      rate > startingPoint
      # Region != "Eastern Asia"
    ) %>%
    ggplot(aes(date, rate, colour = Region)) +  
    geom_line() +
    scale_y_log10(label = comma_format(1)) + 
    facet_wrap(~Continent, ncol = 2) +
    labs(x = "Date", y = "Cumulative Infection Rate (Confirmed Cases / Million)")
) %>%
  shiftAxisLabel(k = 1.5)

Current regional cumulative infection rates. Lines are shown from the point that regional cases exceeded 4 confirmed cases / million. Rates are calculated within each region using regional populations.

Daily New Infection Rates

ggplotly(
  confirmed %>% 
    inner_join(wikiPops) %>%
    mutate(
      Continent = str_replace_all(Continent, "(Oceania|Asia)", "Asia & Oceania")
    ) %>%
    group_by(Continent, Region, Country, Population, date) %>% 
    summarise(confirmed = sum(confirmed)) %>% 
    ungroup() %>%
    group_by(Continent, date) %>%
    mutate(Population = sum(Population)) %>%
    ungroup() %>%
    group_by(Continent, Region, date) %>%
    summarise(
      confirmed = sum(confirmed),
      Population = unique(Population)
    ) %>%
    mutate(
      daily = c(0, diff(confirmed))
    ) %>%
    ungroup() %>%
    dplyr::filter(
      daily > 0 | confirmed > 0,
      date < dt
    ) %>%
    mutate(
      `Daily Rate` = round(1e6 * daily / Population, 2)
    ) %>%
    rename(Date = date) %>%
    ggplot(
      aes(Date, `Daily Rate`, fill = Region)
    ) +
    geom_bar(stat = "identity") +
    geom_line(
      aes(Date, MA),
      data = . %>%
        group_by(Continent, Date) %>%
        summarise(
          `Daily Rate` = sum(`Daily Rate`)
        ) %>%
        mutate(MA = sma(`Daily Rate`, 7)),
      inherit.aes = FALSE
    ) +
  scale_colour_manual(values = c(NA, "black")) +
    scale_x_date(expand = expansion(c(0, 0.03))) +
    scale_y_continuous(expand = expansion(c(0, 0.05))) +
    facet_wrap(~Continent, ncol = 2, scales = "free") +
    labs(
      x = "Date",
      y = "Daily New Infection Rate (cases / million)"
    ),
  tooltip  = c("Date", "Daily Rate", "Region")
) %>%
  shiftAxisLabel(k = 3)

Daily new confirmed infections, shown as cases / million. Values are calculated within the total population for each continent. The solid black line is the 7-day simple moving average calculated within each continent

Infection Rates

For this section, most data is presented relative to population size. Growth in infection rates is only shown after the point at which the cumulative confirmed infection rate breached 4 confirmed cases / million. This equates to about 101 confirmed cases within Australia, and is broadly comparable to the “Days since passing 100 confirmed cases” commonly shown elsewhere.

Most plots in this section have been filtered for countries with a population, cases or fatalities larger than a given value. Due to their significantly strong or poor performances, a handful of countries have been specified to be always included regardless of this filter. These countries are Australia, Brazil, Canada, China (Hubei), China (Other), Finland, Hong Kong, India, Ireland, Japan, Kuwait, New Zealand, Norway, Russia, Singapore, South Korea, Taiwan, US and United Kingdom

  • At the time of report preparation, the total number of confirmed cases, including all countries for which data has been made available, is 27,332,433.
  • The time period for the most recent doubling of global cases was 54 days.
  • The last million cases were confirmed over the last 4 days.

Table of Most Impacted Countries

Confirmed cases in this table are effectively the cumulative, confirmed incidence rate. Recovered patients and those who have passed away are still included in these numbers.

confirmed %>%
  group_by(Country, date) %>%
  summarise(confirmed = sum(confirmed)) %>%
  ungroup() %>%
  group_by(Country) %>%
  dplyr::filter(
    date == max(date),
  ) %>%
  ungroup() %>%
  inner_join(wikiPops) %>%
  mutate(
    rate = 1e6*confirmed / Population,
    occurrence = Population / confirmed,
    Population = round(Population, -3) / 1e6
  ) %>%
  dplyr::filter(rate >= 1) %>%
  arrange(desc(rate)) %>%
  rename_at(vars(everything()), str_to_title) %>%
  dplyr::select(
    Continent, Region, Country,
    Date, Confirmed, 
    `Population (millions)` = Population, 
    `Rate (Cases per Million)` = Rate,
    Occurrence
  ) %>%
  datatable(
    options = list(
      pageLength = 25, 
      autoWidth = TRUE,
      searchCols = list(
        NULL, NULL, NULL, NULL, NULL,
        list(
          search = glue(
            '{minPop/1e6 + 0.001} ... {max(wikiPops$Population/1e6)}'
          )
        ),
        NULL
      )
    ),
    filter = 'top',
    class = "stripe",
    rownames = FALSE,
    caption = paste(
      "The most impacted countries studied here and shown as a proportion of total population.",
      "The initial filter is set so that only countries with a population greater than", comma(minPop), "are shown.",
      "All fields are searchable and sortable.",
      "To filter numeric columns, either use the slider or enter the values in the form 'min ... max'.",
      "To filter text columns, partial matching used in a case-insensitive manner.",
      "Populations have been rounded to the nearest thousand to make reading values easier.",
      "'Rate' represents the latest confirmed infection rate as cumulative cases per million people, whilst 'Occurrence' represents the number of people expected before one case is found, assuming an even distribution amongst the population.",
      "In other words, one in every 'Occurrence' people within the population have been confirmed to have contracted COVID-19.",
      "Occurrence is inversely proportional to Rate.",
      "No adjustment has been made in this table for patients who have recovered or passed away.",
      "Whilst the virus spreads with no regard to population size, the rate as shown here indicates the degree of stress which each country's health-care system is likely to be experiencing.",
      "Several countries shown here have not attracted much media attention due lower case numbers than China and Italy, but are likely to be experiencing significant duress.",
      "Continent and Region information is as provided by the UN classifications."
    )
  ) %>%
  formatCurrency(
    columns = c("Population (millions)"),
    currency = "", 
    digits = 3,
    mark = ","
  ) %>%
  formatCurrency(
    columns = c("Confirmed", "Rate (Cases per Million)", "Occurrence"),
    currency = "", 
    digits = 0,
    mark = ","
  )

Cumulative Incidence Rates

ausDays <- confirmed %>% 
  dplyr::filter(Country == "Australia") %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed), sum) %>%
  left_join(wikiPops) %>%
  mutate(rate = 1e6 * confirmed / Population) %>%
  dplyr::filter(rate > startingPoint) %>%
  nrow()
minDays <- ausDays - 30
# Use Singapore as that has the longest dataset besides Hubei
nDays <- confirmed %>%
  dplyr::filter(Country == "Singapore") %>% 
  group_by(Country, date) %>%
  summarise(confirmed = sum(confirmed)) %>%
  ungroup() %>%
  left_join(wikiPops) %>%
  mutate(
    rate = 1e6*confirmed / Population
  ) %>% 
  dplyr::filter(rate > startingPoint) %>% 
  nrow() %>%
  subtract(1)
refRate <- c(2, 4, 8)
minPop <- 8e6
p <- confirmed %>%
  group_by(Country, date) %>%
  summarise(confirmed = sum(confirmed)) %>%
  ungroup() %>%
  inner_join(
    dplyr::filter(
      wikiPops, Population > minPop | Country %in% alwaysShow
    )
  ) %>%
  mutate(
    rate = 1e6*confirmed / Population
  ) %>%
  dplyr::filter(
    rate > startingPoint
  ) %>%
  group_by(Country) %>%
  mutate(
    days = date - min(date)
  ) %>%
  dplyr::filter(
    max(days) >= minDays | Country %in% alwaysShow
  ) %>%
  ungroup() %>%
  mutate(
    days = as.integer(days),
    rate = round(rate, 2)
  ) %>%
  dplyr::filter(days <= nDays | Country %in% alwaysShow) %>%
  arrange(date) %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    ),
    Country = fct_inorder(Country),
    Region = fct_lump(Region, n = 11)
    ) %>%
  rename_all(str_to_title) %>%
  mutate(ymax = max(Rate)) %>%
  ggplot(
    aes(Days, Rate, colour = Country, Date = Date, Confirmed = Confirmed)
  ) +
  geom_segment(
    aes(x, y, xend = xmax, yend = ymax),
    data = . %>%
      dplyr::slice(seq_along(refRate)) %>%
      dplyr::select(ymax) %>%
      mutate(
        ymax = ymax*1.2,
        x = 0,
        y = startingPoint,
        xmax =  refRate*log2(ymax / startingPoint)
      ),
    inherit.aes = FALSE,
    colour = "grey80",
    linetype = 2
  ) +
  geom_line() +
  scale_x_continuous(
    expand = expansion(mult = c(0, 0.05)),
  ) +
  scale_y_log10(
    expand = expansion(mult = c(0, 0.05)),
    label = comma
  ) +
  scale_colour_manual(values = countryCols) +
  xlab(
    paste(
      "Days since passing", 
      startingPoint,
      "confirmed cases/million"
    )
  ) +
  ylab("Confirmed Cumulative Infection Rate (cases/million)") +
  facet_wrap(~Region, ncol = 3)
ggplotly(
  p, 
  tooltip = c(
    "Days", "Rate", "Country", "Date", "Confirmed"
  )) %>%
  shiftAxisLabel(k = 1.5)

COVID-19 Confirmed Cumulative Infection Rate for countries which have exceeded 4 confirmed cases/million for 152 or more days, and with populations greater than 8,000,000, apart from a small number of specifically included countries. Data is only shown for the first 216 calendar days since passing 4 confirmed cases/million. Note that from the day records begin in this dataset (2020-01-22), the confirmed infection rate in Hubei was 7.5 confirmed cases/million. Diagonal grey lines indicate a doubling in the infection rate every 2, 4, or 8 days. To hide a country, click on the country in the plot legend. Clicking again on the country in the legend will restore the data within the plot. Countries are shown in order of the date at which they passed the 4 confirmed case/million mark. Regions are as defined by the UN with some regions lumped together if only a few data points were available. Due to the large number of countries shown, you may need to scroll through the legend. Regions of the plot are also able to be zoomed interactively. Please note the y-axis is shown on the logarithmic scale, so that a series of points which appear to be diagonal will indicate exponential growth. The flatter the line, the slower the growth and a perfectly horizontal line would indicate zero growth, or no new confirmed cases.

Daily New Cases

minPop <- 5e6
minRate <- 12
ggplotly(
  confirmed %>%
    group_by(Country, date) %>%
    summarise(confirmed = sum(confirmed)) %>%
    mutate(
      `daily total` = c(0, diff(confirmed)),
      daily_ma = sma(`daily total`, 7)
    ) %>%
    ungroup() %>%
    dplyr::filter(`daily total` > 0 | confirmed > 0) %>%
    inner_join(
      dplyr::filter(
        wikiPops, Population > minPop | Country %in% alwaysShow
      )
    ) %>%
    mutate(
      `Daily Rate` = round(1e6 * daily_ma / Population, 2),
      Region = str_replace_all(
        Region, "(Central|Southern) Asia", "Central & Southern Asia"
      ),
      # Region = str_replace_all(
      #   Region, "(Eastern|South-eastern) Asia", "Eastern & South-Eastern Asia"
      # ),
      Region = str_replace_all(
        Region, "(Northern America|Caribbean)", "Caribbean & Northern America"
      ),
      Region = str_replace_all(Region, "(.+) Africa", "Africa (All Regions)"),
      Region = fct_lump(Region, n = 11)
    ) %>%
    group_by(Country) %>%
    dplyr::filter(
      date > "2020-03-01",
      date < dt,
      max(`Daily Rate`) > minRate | Country %in% alwaysShow
    ) %>%
    ungroup() %>%
    rename_all(str_to_title) %>%
    mutate(
      `Population (millions)` = round(Population / 1e6, 2)
    ) %>%
    ggplot(
      aes(Date, `Daily Rate`, colour = Country, label = `Daily Total`, key = `Population (millions)`)
    ) +
    geom_line() +
    facet_wrap(
      ~Region, ncol = 3, scales = "free_y"
    ) +
    labs(
      y = "Daily New Confirmed Cases (per million)"
    ) +
    scale_x_date(expand = expansion(c(0, 0.03))) +
    scale_y_continuous(expand = expansion(c(0, 0.05))) +
    scale_colour_manual(values = countryCols) 
) %>%
  shiftAxisLabel()

Daily New Confirmed Cases (per million) using each country’s population to calculate the case rate. 7 day Simple Moving Averages are used to plot daily case rates in order to minimise the impact of single-day spikes and irregular reporting. Daily Totals provided when hovering are the reported number of cases for that specific day, without smoothing or scaling. Only countries with more than 4,000,000 people and where the daily rate exceeded 12 cases/million are shown.

Daily Increase Vs Confirmed Cases

An alternate viewpoint on the data is to remove time and inspect the relationship between the daily increase in cases and the total number of cases. When this relationship ceases it’s near linear relationship, this can be a sign the control measures have begun to take effect. Whilst this relationship appears to have broken down for Australia, no breakdown has yet occurred for countries such as South Africa, USA and Brazil, with these countries explicitly still in, or having resumed the exponential growth phase.

minPop <- 8e6
minDays <- 10
minRate <- 50
plotIncVConf <- confirmed %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed), sum) %>%
  dplyr::filter(confirmed > 0) %>%
  inner_join(
    dplyr::filter(wikiPops, Population > minPop | Country %in% alwaysShow)
  ) %>%
  mutate(
    rate = 1e6 * confirmed / Population
  ) %>%
  split(f = .$Country) %>% 
  lapply(function(x, n = 7){
    x %>%
      mutate(
        d = c(0, diff(rate))
      ) %>%
      mutate_at(
        vars(d), sma, n = n
      ) %>%
      dplyr::filter(
        !is.na(d),
        rate > minRate
      ) %>%
      mutate(
        nDays = nrow(.)
      )
  }
  ) %>%
  bind_rows() %>%
  ungroup() %>%
  dplyr::filter(
    nDays > minDays,
    d > 0.01
  ) %>%
  arrange(date) %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    ),
    Country = fct_inorder(Country),
    rate = round(rate, 2),
    d = round(d, 3),
    Population = comma(round(Population, -3)),
    Region = fct_lump(Region, n = 11)
  ) %>%
  rename(
    Rate = rate,
    `Average Daily Increase` = d,
    Date = date
  ) %>%
  ggplot(
    aes(Rate, `Average Daily Increase`, colour = Country, label = Population, key = Date)
  ) +
  geom_line() +
  scale_y_log10(
    name = "Average Daily Increase (Cases / Million)",
    label = label_comma(accuracy = 0.1)) +
  scale_x_log10(
    name = "Cumulative Confirmed Infection Rate (Cases / Million)"
  ) +
  scale_colour_manual(values = countryCols) +
  facet_wrap(~Region, ncol = 3) 
capIncVConf <- glue(
  "*Daily Increase in Cases plotted against Confirmed Cases, using confirmed cases / million.
  These two values are directly proportional until interventions are successful, at which point the proportional relationship changes, as evidenced by a sudden downwards turn.
  Scaling by population aids in the visualisation of where in the relative infection trajectory each country's control measures have begun to take effect.
  Daily increases are shown using a 7-day simple moving average in order to minimise the impact of day-to-day variation.
  Countries are only shown from the point at which the moving average exceeds {minRate} cases/million, and have exceeded this value for > {minDays} days.
  Regions are as defined by the UN with some combined for convenience if only a small number of countries were available.
  Data is additionally restricted to countries with a population > {comma(minPop)}.*
  "
)
ggplotly(plotIncVConf +
  coord_cartesian(
    ylim = c(0.5, max(plotIncVConf$data$`Average Daily Increase`)))
) %>%
  shiftAxisLabel(1.5)

Daily Increase in Cases plotted against Confirmed Cases, using confirmed cases / million. These two values are directly proportional until interventions are successful, at which point the proportional relationship changes, as evidenced by a sudden downwards turn. Scaling by population aids in the visualisation of where in the relative infection trajectory each country’s control measures have begun to take effect. Daily increases are shown using a 7-day simple moving average in order to minimise the impact of day-to-day variation. Countries are only shown from the point at which the moving average exceeds 50 cases/million, and have exceeded this value for > 10 days. Regions are as defined by the UN with some combined for convenience if only a small number of countries were available. Data is additionally restricted to countries with a population > 8,000,000.

Comparison of Total Confirmed Infections

minPop <- 4e6
totalConf <- confirmed %>% 
  group_by(Country) %>%
  dplyr::filter(date == max(date)) %>%
  ungroup() %>%
  summarise_at(vars(confirmed), sum) %>%
  pull(confirmed)
n <- 15
confirmed %>%
  group_by(Country) %>%
  dplyr::filter(date == max(date)) %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed), sum) %>%
  ungroup() %>%
  inner_join(wikiPops) %>%
  dplyr::filter(Population > minPop) %>%
  mutate(
    `Infection Rate` = 1e6*confirmed / Population,
    TotalRank = rank(1/confirmed),
    RateRank = rank(1/`Infection Rate`),
    AveRank = (TotalRank + RateRank) / 2,
    topRank = RateRank <= n | TotalRank <= n
  ) %>%
  arrange(desc(RateRank)) %>%
  mutate(
    Country = fct_inorder(Country)
  ) %>%
  dplyr::filter(topRank) %>%
  droplevels() %>%
  mutate(
    Continent = fct_inorder(as.character(Continent))
  ) %>%
  dplyr::select(-contains("Rank")) %>%
  pivot_longer(
    cols = c(confirmed, `Infection Rate`),
    names_to = "name",
    values_to = "value"
  ) %>%
  mutate(
    lab_y = case_when(
      name == "confirmed" ~ value + 8e5,
      name != "confirmed" ~ value + 2e3
    )
  ) %>%
  ggplot(aes(Country, value, fill = Country)) +
  geom_col(colour = "black") +
  geom_label(
    aes(y = lab_y, label = comma_format(1)(value)), 
    alpha = 0.3
  ) +
  facet_grid(
    Continent~name, 
    scales = "free", space = "free_y", 
    labeller = as_labeller(
      c(
        `Infection Rate` = "Confirmed Cases / Million",
        confirmed = "Total Confirmed Cases",
        structure(levels(wikiPops$Continent), names = levels(wikiPops$Continent))
      )
    ), 
    switch = "x"
  ) +
  coord_flip() +
  scale_fill_viridis_d(option = "magma") +
  scale_y_continuous(labels = comma, expand = expansion(c(0, 0.1))) +
  theme(
    axis.title = element_blank(),
    legend.position = "none",
    panel.spacing.x = unit(0.02, "npc"),
    strip.text = element_text(face = "bold"),
    strip.placement = "outside"
  )
*Most impacted countries when combining rankings across both total confirmed cases, and total confirmed cases/million. Only countries with a population greater than 4,000,000 are shown. At the time of preparation, the total number of global infections stands at 27,332,433.*

Most impacted countries when combining rankings across both total confirmed cases, and total confirmed cases/million. Only countries with a population greater than 4,000,000 are shown. At the time of preparation, the total number of global infections stands at 27,332,433.

Fatalities

fr <- confirmed %>%
  inner_join(deaths) %>%
  group_by(Country) %>%
  dplyr::filter(date == max(date)) %>%
  ungroup() %>%
  summarise(fr = sum(deaths) / sum(confirmed)) %>%
  .[["fr"]]
days2death <- 21
offsetFr <- confirmed %>%
  mutate(date = date + days2death) %>%
  inner_join(deaths) %>%
  group_by(Country) %>%
  dplyr::filter(date == max(date)) %>%
  ungroup() %>%
  summarise(fr = sum(deaths) / sum(confirmed)) %>%
  .[["fr"]]
  • At the time of report preparation the total number of reported global fatalities is 892,443.
  • The current fatality rate from all confirmed cases is 3.3%. This may be a function of under-reporting of true cases and is very likely to be an overestimate.
  • Given that mortality from COVID-19 is a reflection of the number of cases contracted in the past, an offset of 21 days (as the median time from contraction of the virus to mortality) was used to obtain more accurate figures. Using this approach the fatality rate is closer to 4.1%

Table of Fatalities

minPop <- 4e6
minCases <- 1000
deaths %>%
  left_join(confirmed) %>%
  group_by(Country, date) %>%
  summarise_at(vars(deaths, confirmed), sum) %>%
  dplyr::filter(deaths > 0) %>%
  left_join(wikiPops) %>%
  ungroup() %>%
  mutate(
    infectionRate = round(1e6 * confirmed / Population, 1),
    fatalityRate = deaths / confirmed,
    fpm = round(1e6 * deaths / Population, 1),
    Population = round(Population / 1e6, 3)
  ) %>%
  arrange(desc(date), fatalityRate) %>%
  distinct(Country, .keep_all = TRUE) %>%
  arrange(desc(fpm)) %>%
  dplyr::select(
    Continent, Region, Country, Population,
    `Confirmed Cases` = confirmed, 
    `Cases / Million` = infectionRate,
    `Total Fatalities` = deaths, 
    `Fatalities / Million` = fpm,
    `% Fatal Infections` = fatalityRate
  ) %>%
  datatable(
    options = list(
      pageLength = 25, 
      autoWidth = TRUE,
      searchCols = list(
        NULL, NULL, NULL, 
        list(
          search = glue(
            '{minPop/1e6} ... {max(wikiPops$Population/1e6)}'
          )
        ),
        list(
          search = glue(
            '{minCases} ... {max(.$`Confirmed Cases`)}'
          )
        ),
        NULL, NULL, NULL
      )
    ),
    filter = 'top',
    class = "stripe",
    rownames = FALSE,
    caption = glue(
      "All countries with recorded fatalities, sorted by default in decreasing order of the Fatality Rate.
      The Fatality Rate simply indicates the number of confirmed cases which end in a fatality.
      The Infection Rate represents the cumulative number of cases confirmed within the country, per million people.
      All columns are searchable and filters are set by default to exclude countries with populations below {comma(minPop)} and those with fewer than {minCases} confirmed cases."
    )
  ) %>%
  formatRound(
    columns = "Cases / Million",
    mark = ",",
    digits = 1
  ) %>%
  formatRound(
    columns = "Total Fatalities",
    mark = ",",
    digits = 0
  ) %>%
  formatPercentage(
    columns = "% Fatal Infections",
    digits = 2
  ) 

Cumulative Fatalities (Scaled)

minRate <- 2
minDays <- 24
minPop <- 5e6
scaledDeathPlot <- deaths %>% 
  group_by(Country, date) %>% 
  summarise_at(vars(deaths), sum) %>%
  ungroup() %>%
  dplyr::filter(deaths > 0) %>%
  right_join(
    wikiPops %>%
      dplyr::filter(Population > minPop | Country %in% alwaysShow)
  ) %>%
  mutate(
    rate = round(1e6 * deaths / Population, 2)
  ) %>%
  dplyr::filter(rate > minRate) %>%
  group_by(Country) %>%
  mutate(Days = as.integer(date - min(date))) %>%
  dplyr::filter(
    max(Days) > minDays | Country %in% alwaysShow
  ) %>%
  arrange(desc(Days)) %>%
  ungroup() %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    ),
    Country = fct_inorder(Country),
    Region = fct_lump(Region, n = 11),
    `One Death Every` = round(Population / deaths, 0),
    deaths = comma(deaths),
    `Population (millions)` = round(Population / 1e6, 1)
  )  %>%
  rename_all(str_to_title) %>%
  rename_all(str_replace_all, pattern = "Mill", replacement = "mill") %>%
  ggplot(
    aes(
      x = Days, y = Rate,  colour = Country, key = Date, 
      b = `Population (millions)`, label = Deaths, 
      family = `One Death Every`
    )
  ) +
  geom_line() +
  scale_y_log10() +
  scale_colour_manual(values = countryCols) +
  labs(
    x = glue("Days Since Passing {minRate} Deaths / Million"),
    y = "Deaths / Million"
  ) +
  facet_wrap(~Region, ncol = 3, scales = "free_y")
scaledDeathPlot %>%
  ggplotly(
    tooltip = c("Country", "Date", "Rate", "Deaths", "One Death Every", "Population (millions)")
  ) %>%
  shiftAxisLabel(1.5)

Cumulative fatalities for all countries with a population greater than 5 million, who passed 2 deaths / million more than 24 days ago. In the legend at right, countries are shown in order of passing 2 deaths / million. Regions are shown as defined by the UN, however some with fewer points may have been merged for convenience.

Daily Fatalities

minPop <- 5e6
minRate <- 0.1
n <- 7
ggplotly(
  deaths %>%
    group_by(Country, date) %>%
    summarise_at(vars(deaths), sum) %>%
    mutate(
      daily = c(0, diff(deaths)),
      MA = sma(daily, n)
    ) %>%
    inner_join(wikiPops) %>%
    ungroup() %>%
    dplyr::filter(
      Population > minPop | Country %in% alwaysShow,
      !is.na(MA),
      deaths > 0
    ) %>%
    arrange(date) %>%
    mutate(
      rate = round(1e6 * MA / Population, 2),
      Region = as.character(Region),
      Region = str_replace_all(
        Region, "(Central|Southern) Asia", "Central & Southern Asia"
      ),
      Region = str_replace_all(
        Region, "(Eastern|South-eastern) Asia", "Eastern & South-Eastern Asia"
      ),
      Region = str_replace_all(
        Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
      ),
      Region = str_replace_all(
        Region, "(Western|Northern) Africa", "Western & Northern Africa"
      ),
      Region = str_replace_all(
        Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
      ),
      Region = fct_lump(Region, n = 11),
      Country = fct_inorder(Country),
      `Population (millions)` = round(Population / 1e6, 2)
    ) %>%
    group_by(Country) %>%
    dplyr::filter(max(rate) > minRate) %>%
    dplyr::rename(
      Date = date,
      `Daily Fatility Rate` = rate,
      `Actual Daily Total` = daily
    ) %>%
    ggplot(
      aes(Date, `Daily Fatility Rate`, colour = Country, label = `Population (millions)`, key = `Actual Daily Total`)
    ) +
    geom_line() +
    facet_wrap(~Region, ncol = 3, scales = "free_y") +
    scale_colour_manual(values = countryCols) +
    labs(
      y = "Average Daily Fatalities (per million)"
    )
) %>%
  shiftAxisLabel(1.5)

Average daily fatality rate (per million) using a 7-day moving average. Only countries with a population greater than 5,000,000 and a daily fatality rate that has exceeded 0.1 deaths/million are included. Clicking the Autoscale icon can be very helpful for this visualisation.

Daily Increase in Fatalities Vs Confirmed Fatalities

minPop <- 4e6
minDays <- 14
minRate <- 1
minD <- 0.45
dailyVsTotalDeaths <- deaths %>%
  group_by(Country, date) %>%
  summarise_at(vars(deaths), sum) %>%
  dplyr::filter(deaths > 0) %>%
  split(f = .$Country) %>%
  lapply(function(x, n = 7){
    x %>%
      mutate(
        d = c(0, diff(deaths))
      ) %>%
      mutate_at(
        vars(deaths, d), sma, n = n
      )
  }
  ) %>%
  bind_rows() %>%
  inner_join(wikiPops) %>%
  mutate(
    deaths = 1e6 * deaths / Population,
    d = 1e6 * d / Population,
    max_d = max(d, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  dplyr::filter(!is.na(deaths), !is.na(d)) %>%
  mutate_at(vars(deaths, d), round, 2) %>%
  dplyr::filter(
    Population > minPop | Country %in% alwaysShow,
    max_d > minD | Country %in% alwaysShow,
    deaths > minRate,
    d > 0
  ) %>%
  group_by(Country) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  dplyr::filter(n > minDays | Country %in% alwaysShow) %>%
  rename_all(str_to_title) %>%
  rename(
    `Daily Fatalities` = D,
    `Total Fatalities` = Deaths
  ) %>%
  arrange(Date) %>%
  mutate(
    Population = comma(round(Population, -3)),
    Country = fct_inorder(Country),
    Region = str_replace_all(Region, "(Caribbean|Northern America)", "Caribbean & Northern America"),
    Region = fct_lump(Region, n = 11)
    ) %>%
  ggplot(aes(`Total Fatalities`, `Daily Fatalities`, colour = Country, label = Date, key = Population)) +
  geom_line() +
  scale_x_log10(labels = label_comma(accuracy = 1)) +
  scale_y_log10(labels = label_comma(accuracy = 0.01), limits = c(0.01, NA)) +
  scale_colour_manual(values = countryCols) +
  facet_wrap(~Region, ncol = 3, scales = "free_y") +
  labs(
    x = "Total Fatalities (per million)",
    y = "Daily Fatalities (per million)"
  )
capDailyVsTotal <- glue(
  "*Daily Fatalities plotted against Total Fatalities, scaled by population size using Fatalities / Million.
  These two values are directly proportional until interventions are successful, at which point the proportional relationship changes, as evidenced by a sudden downwards turn.
  Values shown are 7-day simple moving averages in order to minimise the impact of day-to-day variation.
  Countries are only shown from the point at which the moving average exceeds {minRate} fatality per million people, and has exceeded this value for > {minDays} days.
  Data is additionally restricted to countries with a population > {comma(minPop)}, and those who at one point were recording > {minD} daily fatalities / million.
  Importantly, __due to the time taken from the initial infection to the day of death, this is a lag indicator of the control of infection__.*
  "
)
ggplotly(
  dailyVsTotalDeaths 
) %>%
  shiftAxisLabel(1.6)

Daily Fatalities plotted against Total Fatalities, scaled by population size using Fatalities / Million. These two values are directly proportional until interventions are successful, at which point the proportional relationship changes, as evidenced by a sudden downwards turn. Values shown are 7-day simple moving averages in order to minimise the impact of day-to-day variation. Countries are only shown from the point at which the moving average exceeds 1 fatality per million people, and has exceeded this value for > 14 days. Data is additionally restricted to countries with a population > 4,000,000, and those who at one point were recording > 0.45 daily fatalities / million. Importantly, due to the time taken from the initial infection to the day of death, this is a lag indicator of the control of infection.

Comparison of Total Fatalities

n <- 20
topDeaths <- deaths %>%
  dplyr::filter(deaths > 0) %>%
  group_by(Country, date) %>%
  summarise_at(vars(deaths), sum) %>%
  dplyr::filter(
    deaths == max(deaths),
    date == max(date)
  ) %>%
  ungroup() %>%
  inner_join(wikiPops) %>%
    dplyr::filter(Population > minPop) %>%
  mutate(
    rate = 1e6*deaths / Population,
    totalRank = nrow(.) - rank(deaths) + 1,
    rateRank = nrow(.) - rank(rate) + 1,
    aveRank = (totalRank + rateRank)/2,
    topRank = totalRank <= n | rateRank <= n
    ) %>%
  arrange(aveRank, desc(Population)) %>%
  dplyr::filter(topRank) %>%
  # dplyr::slice(1:nCnt) %>%
  arrange(desc(rateRank)) %>%
  mutate(Country = fct_inorder(Country)) %>%
  arrange(rateRank) %>%
  mutate(Continent = fct_inorder(as.character(Continent)))
a <- topDeaths %>%
  ggplot(aes(Country, rate, fill = Country)) +
  geom_col(colour = "black") +
  geom_label(
    aes(y = rate + 50, label = round(rate, 0)),
    fill = "white", alpha = 0.5
  ) +
  scale_fill_viridis_d(option = "magma", direction = 1) +
  scale_y_continuous(expand = expansion(c(0, 0.08))) +
  labs(y = "Fatalities / Million") +
  coord_flip() +
  facet_grid(Continent~., scales = "free_y", space = "free_y") +
  theme(legend.position = "none")
b <- topDeaths %>%
  ggplot(aes(Country, deaths, fill = Country)) +
  geom_col(colour = "black") +
  geom_label(
    aes(y = deaths + 18000, label = comma(deaths, accuracy = 1)),
    fill = "white", alpha = 0.5
  ) +
  scale_fill_viridis_d(option = "magma", direction = 1) +
  scale_y_continuous(expand = expansion(c(0, 0.08)), labels = comma) +
  labs(y = "Total Fatalities") +
  coord_flip() +
  facet_grid(Continent~., scales= "free_y", space = "free_y") +
  theme(legend.position = "none")
cp <- glue(
  "*Comparison of the {nrow(a$data)} most impacted countries.
  Countries were ranked by total fatalities and by fatalities / million, with the {nrow(a$data)} most highly ranked across both methods are shown.
  Fatalities are shown using A) the number of fatalities scaled by population size (Fatalities / Million) and B) Total Fatalities.
  Countries are also grouped by their continent as designated by the UN classifications, and within each continent each country is ordered by Fatalities / Million.
  For fatality rates scaled by population size, it is important to realise that a value of 500 indicates that one in every 2000 people from the total population has died (ignoring demographics).
  Similarly, a value of 100 indicates that 1 in every 10,000 from the total population has died in that country.
  Whilst the US currently has the most fatalities, 1 in every {comma(round(1e6 / dplyr::filter(a$data, Country == 'US')$rate, 0))} from the US population have currently been confirmed to have died from COVID19.
  For {as.character(a$data$Country[which.max(a$data$rate)])}, 1 in every {comma(round(1e6 / max(a$data$rate), 0))} people are recorded as having died from COVID 19.
  Importantly, whilst the fatality count in countries like the US and the UK are below the statistical value of 'excess deaths' observed throughout these countries, this is not true for Belgium and the fatality rate for Belgium is more likely to reflect an accurate assessment of the true fatalities due to COVID-19.
  In contrast, the values for the US and the UK are likely to be under-estimates of the true fatalities.*"
)
plot_grid(
  a + theme(legend.position = "none"),
  b + 
    theme(
      legend.position = "none",
      axis.title.y = element_blank()
    ),
  labels = c("A", "B"),
  nrow = 1
)
*Comparison of the 26 most impacted countries.
Countries were ranked by total fatalities and by fatalities / million, with the 26 most highly ranked across both methods are shown.
Fatalities are shown using A) the number of fatalities scaled by population size (Fatalities / Million) and B) Total Fatalities.
Countries are also grouped by their continent as designated by the UN classifications, and within each continent each country is ordered by Fatalities / Million.
For fatality rates scaled by population size, it is important to realise that a value of 500 indicates that one in every 2000 people from the total population has died (ignoring demographics).
Similarly, a value of 100 indicates that 1 in every 10,000 from the total population has died in that country.
Whilst the US currently has the most fatalities, 1 in every 1,739 from the US population have currently been confirmed to have died from COVID19.
For Peru, 1 in every 1,090 people are recorded as having died from COVID 19.
Importantly, whilst the fatality count in countries like the US and the UK are below the statistical value of 'excess deaths' observed throughout these countries, this is not true for Belgium and the fatality rate for Belgium is more likely to reflect an accurate assessment of the true fatalities due to COVID-19.
In contrast, the values for the US and the UK are likely to be under-estimates of the true fatalities.*

Comparison of the 26 most impacted countries. Countries were ranked by total fatalities and by fatalities / million, with the 26 most highly ranked across both methods are shown. Fatalities are shown using A) the number of fatalities scaled by population size (Fatalities / Million) and B) Total Fatalities. Countries are also grouped by their continent as designated by the UN classifications, and within each continent each country is ordered by Fatalities / Million. For fatality rates scaled by population size, it is important to realise that a value of 500 indicates that one in every 2000 people from the total population has died (ignoring demographics). Similarly, a value of 100 indicates that 1 in every 10,000 from the total population has died in that country. Whilst the US currently has the most fatalities, 1 in every 1,739 from the US population have currently been confirmed to have died from COVID19. For Peru, 1 in every 1,090 people are recorded as having died from COVID 19. Importantly, whilst the fatality count in countries like the US and the UK are below the statistical value of ‘excess deaths’ observed throughout these countries, this is not true for Belgium and the fatality rate for Belgium is more likely to reflect an accurate assessment of the true fatalities due to COVID-19. In contrast, the values for the US and the UK are likely to be under-estimates of the true fatalities.

Fatality Rate Vs Time

minDays <- 60
minPop <- 4e6
startingPoint <- 2
df <- confirmed %>%
  inner_join(deaths) %>%
  group_by(Country, date) %>%
  summarise_at(
    vars(confirmed, deaths),
    sum
  ) %>%
  inner_join(
    wikiPops %>% dplyr::filter(Population > minPop | Country %in% alwaysShow)
  ) %>%
  ungroup() %>%
  mutate(
    `Infection Rate` = 1e6 * confirmed / Population,
    f  = 1e6 * deaths / Population
  ) %>%
  dplyr::filter(
    f > startingPoint,
    Country != "Yemen"
    # `Infection Rate` > startingPoint
    ) %>%
  group_by(Country) %>%
  mutate(
    Days = date - min(date),
    n = n()
  ) %>%
  dplyr::filter(
    n > minDays,
    max(deaths, na.rm = TRUE) > 0
  ) %>%
  mutate(
    Rate = deaths / confirmed,
    `Fatality Rate` = percent(Rate, accuracy = 0.1),
    minusT = date - max(date)
  ) %>%
  ungroup() 
plotFr <- mutate(
  df,
  Country = factor(
    Country, 
    levels = df %>%
      dplyr::select(Country, minusT, Rate) %>%
      pivot_wider(
        id_cols = Country, 
        names_from = minusT, 
        values_from = Rate
      ) %>% 
      as.data.frame() %>%
      column_to_rownames("Country") %>% 
      dist() %>% 
      hclust() %>% 
      as.dendrogram() %>% 
      labels()
  )
) %>% 
  rename_all(str_to_title) %>%
  mutate(`Population (millions)` = round(Population / 1e6, 2)) %>%
  ggplot(
    aes(
      x = Days, y = Country, fill = Rate,
      conf = Confirmed, 
      deaths = Deaths,
      date = Date,
      label = `Fatality Rate`,
      key = `Population (millions)`
      )
  ) +
  geom_raster() +
  geom_vline(
    aes(xintercept = Days + 0.5),
    data = . %>%
      dplyr::filter(Country == "Australia") %>%
      dplyr::filter(Date == max(Date)),
    linetype = 3,
    size = 1/3,
    colour = "grey70"
  ) +
  scale_fill_viridis_c(
    option = "magma"
  ) +
  scale_x_continuous(
    expand = expansion(0, 0),
    labels = abs
    ) +
  scale_y_discrete(expand = expansion(0, 0)) +
  labs(
    x = glue(
      "Days since passing {startingPoint} fatalities/million"
    ),
    y = c(),
    fill = "Fatality\nRate"
  ) +
  theme(
    panel.grid = element_blank()
  )
cpFr <- glue(
  "*Fatality Rate from confirmed cases after passing {startingPoint} fatalities / million.
  Only countries with {minDays} days of data beyond this time-point and a population size >{comma(minPop)} are shown.
  Country order is based on clustering using the most recent values.
  Countries with no recorded fatalities have been excluded for obvious reasons. 
  The dashed grey line indicates the time-point Australia is currently at.
  A clear trend of an increasing fatality rate with time is evident in many countries (e.g. Spain, France, Italy, UK), however, for some countries (e.g. Singapore) this rate appears relatively stable throughout the majority of days.
  The overall Fatality Rate for confirmed cases is currently ({percent(fr, accuracy = 0.1)}).*"
)
ggplotly(
  plotFr ,
  tooltip = c(
    "Country", "Date", "Days", "Confirmed", "Deaths",
    "Fatality Rate", "Population (millions)"
    )
  )

Fatality Rate from confirmed cases after passing 2 fatalities / million. Only countries with 60 days of data beyond this time-point and a population size >4,000,000 are shown. Country order is based on clustering using the most recent values. Countries with no recorded fatalities have been excluded for obvious reasons. The dashed grey line indicates the time-point Australia is currently at. A clear trend of an increasing fatality rate with time is evident in many countries (e.g. Spain, France, Italy, UK), however, for some countries (e.g. Singapore) this rate appears relatively stable throughout the majority of days. The overall Fatality Rate for confirmed cases is currently (3.3%).

Currently Active Infections

rr <- confirmed %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed), sum, na.rm = TRUE) %>%
  left_join(
    recovered %>% 
      group_by(Country, date) %>%
      summarise_at(vars(recovered), sum, na.rm = TRUE)
  ) %>%
  dplyr::filter(date == max(date)) %>%
  ungroup() %>%
  summarise(rr = sum(recovered) / sum(confirmed)) %>%
  .[["rr"]]
predRR <- confirmed %>%
  group_by(Country) %>%
  dplyr::filter(
    date == max(date)
  ) %>%
  ungroup() %>%
  left_join(predRecovered) %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed, recovered), sum, na.rm = TRUE) %>%
  ungroup() %>%
  summarise(rr = sum(recovered) / sum(confirmed)) %>%
  .[["rr"]]

Information regarding recovered cases is likely to be the least reliable of reported values as many regions do not report updated numbers for several consecutive days. Additionally many regions do not report recovered cases as the criteria for considering a person to have recovered as currently unclear. Given this:

  • The current recovery rate from all confirmed cases is 67.0%. Given the level of under-reporting this may also be highly inaccurate.
  • As there is poor and inconsistent reporting of recoveries in many countries, an alternate methodology for estimating recoveries is to use the median recovery time of 21 days to predict recovered cases, subtracting those cases which proved fatal. Under this approach 77% of cases are predicted to be recovered.
  • Using reported recoveries, 29.7% of all confirmed infections are considered as ‘active’.
  • Using predicted recoveries, this changes to 20.0% of confirmed infections predicted to be currently active.

Reported Active Infections Across All Countries

minDays <- 30
startingPoint <- 4
minPop <- 5e6
df <- confirmed %>%
  dplyr::filter(
    confirmed > 0,
    Country != "China (Other)"
  ) %>%
  left_join(deaths) %>%
  group_by(Country, date) %>%
  summarise_at(
    vars(confirmed, deaths), sum
  ) %>%
  inner_join(
    recovered %>%
      group_by(Country, date) %>%
      summarise_at(vars(recovered), sum)
  ) %>%
  mutate_at(vars(confirmed, deaths, recovered), cummax) %>%
  mutate(
    active = confirmed - deaths - recovered
  ) %>%
  dplyr::filter(!is.na(active)) %>%
  inner_join(
    dplyr::filter(wikiPops, Population > minPop | Country %in% alwaysShow)
  ) %>%
  mutate(
    rate = 1e6 * active / Population,
    pass = cummax(rate > startingPoint)
  ) %>%
  dplyr::filter(pass > 0) %>%
  group_by(Country) %>%
  mutate(
    days = date - min(date)
  ) %>%
  dplyr::filter(max(days) > minDays | Country %in% alwaysShow) %>% 
  dplyr::filter(max(recovered) > 0) %>%
  ungroup() %>%
  mutate(
    days = as.integer(days),
    rate = round(rate, 2)
  ) %>%
  arrange(date) %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    ),
    Country = fct_inorder(Country),
    Region = fct_lump(Region, n = 11)
  ) %>%
  rename_all(str_to_title)
nDays <- max(df$Days)
p2 <- df %>%
  ggplot(
    aes(
      x = Days, y = Rate, colour = Country, 
      Date = Date, Active = Active,
      Confirmed = Confirmed, Recovered = Recovered,
      Deaths = Deaths
      )
  ) +
  geom_line() +
  scale_x_continuous(
    expand = expansion(mult = c(0, 0.05)),
  ) +
  scale_y_log10(
    expand = expansion(mult = c(0, 0.05)),
    labels = comma_format(accuracy = 1),
    breaks = 10^seq(1, 4)
  ) +
  scale_colour_manual(values = countryCols) +
  xlab(
    paste(
      "Days since passing", 
      startingPoint, 
      "confirmed active cases/million"
    )
  ) +
  ylab("Confirmed Active Infection Rate (cases/million)") + 
  facet_wrap(~Region, ncol = 3)
ggplotly(
  p2 +
    coord_cartesian(ylim = c(1, max(p2$data$Rate)))
) %>%
  shiftAxisLabel(1.8)

Confirmed active cases of COVID-19 for countries where the confirmed infection rate has exceeded 4 confirmed active cases/million for more than 229 calendar days. Only countries with a population greater than 5,000,000 are shown for better visualisation. Due to difficulties introduced by the currently reported low active infection rate outside Hubei province, data from China has been excluded from this plot, with the exception of Hubei and Hong Kong. Recovered cases are poorly and irregularly reported by many countries (e.g. Ireland, Serbia & Norway). Some countries, such as Sweden, are not reporting recovered cases and these countries have been excluded from this plot. As a result, this plot may indicate multiple instances of a sudden decline which are a simple artefact of data release schedules. To hide a country, click on the country in the plot legend. Clicking again on the country in the legend will restore the data within the plot. Countries are shown in order of the date at which they passed the 4 confirmed active case/million mark. Due to the number of countries shown, you may need to scroll through the legend. Regions of the plot are also able to be zoomed interactively. Please note the y-axis is shown on the logarithmic scale, so that a series of points which appear to be diagonal will indicate exponential growth/decay. Given the different starting point to the previous plot, data will generally be shown for fewer time-points.

Predicted Active Infections Across All Countries

df <- confirmed %>%
  dplyr::filter(
    confirmed > 0,
    Country != "China (Other)"
  ) %>%
  left_join(deaths) %>%
  group_by(Country, date) %>%
  summarise_at(
    vars(confirmed, deaths), sum
  ) %>%
  inner_join(
    predRecovered %>%
      group_by(Country, date) %>%
      summarise_at(vars(recovered), sum)
  ) %>%
  mutate_at(vars(confirmed, deaths, recovered), cummax) %>%
  mutate(
    active = confirmed - deaths - recovered
  ) %>%
  dplyr::filter(!is.na(active)) %>%
  inner_join(
    dplyr::filter(wikiPops, Population > minPop | Country %in% alwaysShow)
  ) %>%
  mutate(
    rate = 1e6 * active / Population,
    pass = cummax(rate > startingPoint)
  ) %>%
  dplyr::filter(pass > 0) %>%
  group_by(Country) %>%
  mutate(
    days = date - min(date)
  ) %>%
  dplyr::filter(max(days) > minDays | Country %in% alwaysShow) %>% 
  dplyr::filter(max(recovered) > 0) %>%
  ungroup() %>%
  mutate(
    days = as.integer(days),
    rate = round(rate, 2)
  ) %>%
  arrange(date) %>%
  mutate(
    Region = str_replace_all(
      Region, "(Southern|Western) Europe", "Southern & Western Europe"
    ),
    Region = str_replace_all(
      Region, "(Central|Southern) Asia", "Central & Southern Asia"
    ),
    Region = str_replace_all(
      Region, "(Central America|Northern America|Caribbean)", "Caribbean, Central & Northern America"
    ),
    Region = str_replace_all(
      Region, "(Eastern|Middle|Southern) Africa", "Eastern, Middle & Southern Africa"
    ),
    Country = fct_inorder(Country),
    Region = fct_lump(Region, n = 11)
  ) %>%
  rename_all(str_to_title)
nDays <- max(df$Days)
p2 <- df %>%
  ggplot(
    aes(
      x = Days, y = Rate, colour = Country, 
      Date = Date, Active = Active,
      Confirmed = Confirmed, Recovered = Recovered,
      Deaths = Deaths
      )
  ) +
  geom_line() +
  scale_x_continuous(
    expand = expansion(mult = c(0, 0.05)),
  ) +
  scale_y_log10(
    expand = expansion(mult = c(0, 0.05)),
    labels = comma_format(accuracy = 1),
    breaks = 10^seq(1, 4)
  ) +
  scale_colour_manual(values = countryCols) +
  xlab(
    paste(
      "Days since passing", 
      startingPoint, 
      "predicted active cases/million"
    )
  ) +
  ylab("Predicted Active Infection Rate (cases/million)") + 
  facet_wrap(~Region, ncol = 3)
ggplotly(
  p2 +
    coord_cartesian(ylim = c(1, max(p2$data$Rate)))
) %>%
  shiftAxisLabel(1.8)

Predicted active cases of COVID-19 for countries where the active infection rate has exceeded 4 predicted active cases/million for more than 208 calendar days. Predicted recovered cases are obtained using the median recovery time of 21 days, accounting for fatal cases. This allows for assessment of countries where recovered cases are poorly reported. Only countries with a population greater than 5,000,000 are shown for better visualisation. Due to difficulties introduced by the currently reported low active infection rate outside Hubei province, data from China has been excluded from this plot, with the exception of Hubei and Hong Kong. To hide a country, click on the country in the plot legend. Clicking again on the country in the legend will restore the data within the plot. Countries are shown in order of the date at which they passed the 4 predicted active case/million mark. Due to the number of countries shown, you may need to scroll through the legend. Regions of the plot are also able to be zoomed interactively. Please note the y-axis is shown on the logarithmic scale, so that a series of points which appear to be diagonal will indicate exponential growth/decay. Given the different starting point to the previous plot, data will generally be shown for fewer time-points.

Summary of All Rates

minPop <- 8e6
p4 <- confirmed %>%
  dplyr::filter(
    confirmed > 0
  ) %>%
  left_join(deaths) %>%
  group_by(Country, date) %>%
  summarise_at(vars(confirmed, deaths), sum) %>%
  dplyr::filter(date == max(date)) %>%
  left_join(
    recovered %>%
      group_by(Country, date) %>%
      summarise_at(vars(recovered), sum)
  ) %>%
  ungroup() %>%
  inner_join(
    wikiPops %>% dplyr::filter(Population > minPop | Country %in% alwaysShow)
  ) %>%
  mutate(rate = 1e6 * confirmed / Population) %>%
  dplyr::filter(rate > startingPoint) %>% 
  group_by(Country) %>%
  mutate(
    active = confirmed - recovered - deaths,
    active = 100*active / confirmed,
    recovered = 100*recovered / confirmed,
    fatalities = 100*deaths / confirmed
  ) %>%
  dplyr::filter(max(recovered) > 1) %>%
  ungroup() %>%
  dplyr::filter(active < 100) %>%
  arrange(active) %>% 
  mutate(Country = fct_inorder(Country)) %>%
  pivot_longer(
    cols = c(active, recovered, fatalities),
    names_to = "Status",
    values_to = "Percentage"
  ) %>%
  mutate(
    Status = str_to_title(Status),
    Status = factor(
      Status, 
      levels = c("Active", "Recovered", "Fatalities")
    ),
    Percentage = round(Percentage, 2)
  ) %>%
  mutate(confirmed = comma(confirmed)) %>%
  rename(Confirmed = confirmed) %>%
  ggplot(
    aes(
      Country, Percentage,
      fill = Status, cases = Confirmed
    )
  ) +
  geom_col() +
  scale_fill_manual(
    values = c(
      Active = "blue",
      Recovered = "green",
      Fatalities = "red"
      )
  ) +
  scale_y_continuous(expand = expansion(0, 0)) +
  coord_flip() +
  labs(x = c()) +
  theme(
    legend.position = "none"
  )
ggplotly(p4)

Fatality, Recovery and Active Infection rates for countries which have exceeded 4 confirmed cases / million, and with a population size > 8,000,000. Countries are ordered by the percentage of cases that remain active. Only countries with a reported recovery rate > 1% are shown

Australian Data

All Australian specific data has been moved to a separate page for simpler management.

R Session Information

R version 4.0.2 (2020-06-22)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=C, LC_NUMERIC=C, LC_TIME=C, LC_COLLATE=C, LC_MONETARY=C, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: readxl(v.1.3.1), ggfortify(v.0.4.10), QuantTools(v.0.5.7), data.table(v.1.13.0), cowplot(v.1.0.0), plotly(v.4.9.2.1), DT(v.0.15), pander(v.0.6.3), RCurl(v.1.98-1.2), rvest(v.0.3.6), xml2(v.1.3.2), jsonlite(v.1.7.0), glue(v.1.4.2), broom(v.0.7.0), ggrepel(v.0.8.2), matrixStats(v.0.56.0), scales(v.1.1.1), lubridate(v.1.7.9), magrittr(v.1.5), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.2), purrr(v.0.3.4), readr(v.1.3.1), tidyr(v.1.1.2), tibble(v.3.0.3), ggplot2(v.3.3.2) and tidyverse(v.1.3.0)

loaded via a namespace (and not attached): Rcpp(v.1.0.5), assertthat(v.0.2.1), digest(v.0.6.25), R6(v.2.4.1), cellranger(v.1.1.0), backports(v.1.1.9), reprex(v.0.3.0), evaluate(v.0.14), highr(v.0.8), httr(v.1.4.2), pillar(v.1.4.6), rlang(v.0.4.7), lazyeval(v.0.2.2), rstudioapi(v.0.11), blob(v.1.2.1), rmarkdown(v.2.3), labeling(v.0.3), htmlwidgets(v.1.5.1), munsell(v.0.5.0), compiler(v.4.0.2), modelr(v.0.1.8), xfun(v.0.16), pkgconfig(v.2.0.3), htmltools(v.0.5.0), tidyselect(v.1.1.0), gridExtra(v.2.3), fasttime(v.1.0-2), fansi(v.0.4.1), viridisLite(v.0.3.0), crayon(v.1.3.4), dbplyr(v.1.4.4), withr(v.2.2.0), bitops(v.1.0-6), grid(v.4.0.2), gtable(v.0.3.0), lifecycle(v.0.2.0), DBI(v.1.1.0), cli(v.2.0.2), stringi(v.1.4.6), farver(v.2.0.3), fs(v.1.5.0), ellipsis(v.0.3.1), generics(v.0.0.2), vctrs(v.0.3.4), Cairo(v.1.5-12.2), tools(v.4.0.2), crosstalk(v.1.1.0.1), hms(v.0.5.3), yaml(v.2.2.1), colorspace(v.1.4-1), knitr(v.1.29) and haven(v.2.3.1)